# Set your working directory to where the raw data file is saved.
# All raw zebrabox data should be in the shared drive in the location listed below,
# so this should not need to be changed!
# setwd("//ressmb01.research.chop.edu/Falk_lab/zebrabox_data_analysis")
# List your file name within the apostrophes in the code below. Do not delete the .xls portion!
# raw_data <- read.delim("example_data/Example of difference in plate layout/20250107-094117.xls",
# fileEncoding = "UTF-16LE", stringsAsFactors = F)
raw_data <- read_tsv('simulated_zebrabox_data2.tsv')
# Same for the metadata her
# List your file name within the colons in the code below. Do not delete the .xls portion!
metadata <- readxl::read_excel('2025-07-10_simulated_metadata.xlsx',
skip = 1) %>%
mutate(box_used = as.character(box_used),
box_used = case_when(box_used == '1' ~ 'LocA',
box_used == '2' ~ 'LocB',
TRUE ~ NA_character_))
# list control group here, you must match spelling and capitalization exactly
control_group <- "WT"
# This code will clean the raw data into a better format for further analysis
raw_data %>%
mutate(time_min = start / 60, experiment_time_of_day = '',
period2 = ifelse(nchar(time_min) == 1, 1,
as.integer(str_extract(time_min, '^[0-9]')) + 1),
plate = str_extract(location, 'Loc[AB]'),
light = ifelse(period2 %% 2 != 0 & period2 != 1, 'dark', 'light')) %>%
separate(aname, into = c('well', 'name'), sep = '_') %>%
left_join(metadata, by = join_by(well, plate == box_used)) %>%
select(plate, treatment, well, time_min, light, period,
datatype, activity = actinteg, everything()) %>%
filter(!is.na(treatment), timebinid == 1) -> zebrabox_data
# This code creates a table of the cleaned data in the final report so you can easily download into excel if needed
# You can also use this table to quickly spot check your data and review specific fish/wells if needed
zebrabox_data %>%
DT::datatable(extensions = 'Buttons',
options = list(dom = 'Blfrtip',
buttons = c('copy', 'csv', 'excel'),
lengthMenu = list(c(10, 25, 50, -1),
c(10, 25, 50, "All"))),
caption = htmltools::tags$caption(style = 'caption-side: top; text-align: left; color:black; font-size:200% ;',
'Zebrabox'))
zebrabox_data %>%
separate(well, into = c('well_row', 'well_column'), sep = 1) %>%
filter(datatype == 'QuantizationSum', period2 == 3) %>%
ggplot(aes(x = well_column, y = well_row, text = treatment,
color = activity)) +
geom_point(size = 4) +
scale_color_continuous(limits = c(0, 7000)) +
scale_x_discrete(position = "top") +
scale_y_discrete(limits = rev) +
facet_wrap(~ plate, ncol = 2) +
labs(x = NULL, y = NULL, color = 'Activity\n1st Dark\nPeriod') +
theme_minimal() -> plate_map
ggplotly(plate_map)
### find the maximum activity to scale the graph
# if LocB is present, use the maximum summarized activity value, otherwise
# use LocA
zebrabox_data %>%
filter(startreason == 'Beginning of period', !is.na(activity)) %>%
filter(ifelse(str_detect(plate, 'LocB'), plate == 'LocB', plate == 'LocA')) %>%
filter(activity == max(activity)) %>%
select(activity) %>%
deframe() -> qc_all_act_max
# zebrabox all fish
zebrabox_data %>%
filter(startreason == 'Beginning of period') %>%
ggplot(aes(x = time_min, y = activity, color = well, group = well)) +
# add colored blocks to background based on light/dark cycle
# colored based on the "light" column
# xmin is the current time, xmax is the current time + 1 which makes boxes of
# width 1 minute
# ymax is 0 to the limits I set on the plot 10,000
# you will see in the documentation that you can use -Inf/Inf with geom_rect
# to fill up the entire plot area but this is not currently supported with
# plotly https://github.com/plotly/plotly.R/issues/1559
geom_rect(aes(fill = light, xmin = time_min,
xmax = time_min + 1, ymin = 0, ymax = qc_all_act_max + 100),
color = NA) +
scale_fill_manual(values = c('gray80', 'white')) +
geom_vline(xintercept = seq(0, max(zebrabox_data$time_min), 10), linetype = 'dashed', color = 'gray60') +
geom_point() +
geom_line() +
scale_x_continuous(breaks = seq(0, max(zebrabox_data$time_min), 10)) +
facet_wrap(~ plate, ncol = 1) +
coord_cartesian(ylim = c(0, qc_all_act_max)) +
labs(x = 'Time (min)', y = 'Activity', title = 'Zebrabox Individual Fish') +
theme_classic(base_size = 16) -> zebrabox_individual
ggplotly(zebrabox_individual)
### calculate average activity by treatment
zebrabox_data %>%
filter(startreason == 'Beginning of period') %>%
group_by(plate, treatment, light, time_min) %>%
summarize(mean_activity = mean(activity, na.rm = T)) %>%
ungroup() -> qc_condition_summary
### find the maximum activity to scale the graph
# if LocB is present, use the maximum summarized activity value, otherwise
# use LocA
qc_condition_summary %>%
filter(ifelse(str_detect(plate, 'LocB'), plate == 'LocB', plate == 'LocA')) %>%
filter(mean_activity == max(mean_activity)) %>%
select(mean_activity) %>%
deframe() -> qc_condition_max
### plot an interactive graph of summarized activity by treatment
qc_condition_summary %>%
ggplot(aes(x = time_min, y = mean_activity, color = treatment, group = treatment)) +
geom_rect(aes(fill = light, xmin = time_min,
xmax = time_min + 1, ymin = 0, ymax = qc_condition_max + 100), color = NA) +
scale_fill_manual(values = c('gray80', 'white')) +
geom_vline(xintercept = seq(0, max(zebrabox_data$time_min), 10), linetype = 'dashed', color = 'gray60') +
geom_point() +
geom_line() +
facet_wrap(~ plate) +
coord_cartesian(ylim = c(0, qc_condition_max)) +
labs(x = 'Time (min)', y = 'Activity', title = 'Zebrabox Summarized Conditions') +
theme_classic(base_size = 16) -> zebrabox_summarized
ggplotly(zebrabox_summarized)
Right now only removing fish if they are “dead” based on very low activity across the entire experiment, but this may change in the future. Fish are displayed in this table if their mean activity in the dark cycles is less than 10 across all periods but are not removed from downstream analysis.
# leaving previous filtering code for now as reference if we want to censor based on other behavior
# zebrabox_flagged %>%
# filter(startreason == 'Beginning of period') %>%
# group_by(condition, well) %>%
# summarize(low_activity = sum(low_activity),
# high_activity = sum(high_activity),
# light_no_freeze = sum(light_no_freeze),
# dark_no_swim = sum(dark_no_swim)) %>%
# ungroup() %>%
# filter(low_activity > 3 | high_activity > 3 | light_no_freeze > 3 |
# dark_no_swim > 3) %>%
# DT::datatable(extensions = 'Buttons',
# options = list(dom = 'Blfrtip',
# buttons = c('copy', 'csv', 'excel'),
# lengthMenu = list(c(10, 25, 50, -1),
# c(10, 25, 50, "All"))),
# caption = htmltools::tags$caption(style = 'caption-side: top; text-align: left; color:black; font-size:200% ;',
# 'Zebrabox'))
# removing fish if mean activity across all dark periods is less than 10
zebrabox_data %>% #distinct(startreason)
filter(datatype == 'QuantizationSum', !is.na(activity), light == 'dark') %>%
group_by(plate, treatment, well) %>%
summarize(mean_activity = mean(activity)) %>%
ungroup() -> mean_activity
# mean_activity %>%
# ggplot(aes(x = mean_activity)) +
# geom_histogram(bins = 20, fill = 'white', color = 'black') +
# labs(x = 'Mean Activity in Dark Cycles', y = 'Number of Wells') +
# theme_bw()
mean_activity %>%
filter(mean_activity < 10) %>%
DT::datatable(extensions = 'Buttons',
options = list(dom = 'Blfrtip',
buttons = c('copy', 'csv', 'excel'),
lengthMenu = list(c(10, 25, 50, -1),
c(10, 25, 50, "All"))),
caption = htmltools::tags$caption(style = 'caption-side: top; text-align: left; color:black; font-size:200% ;',
'Zebrabox'))
zebrabox_data %>%
filter(datatype == 'QuantizationSum', !is.na(activity)) %>%
group_by(treatment, light, well) %>%
summarize(mean_activity = mean(activity)) %>%
ungroup() %>%
filter(light == 'dark') %>%
ggplot(aes(x = treatment, y = mean_activity)) +
ggbeeswarm::geom_quasirandom() +
geom_boxplot(alpha = 0) +
labs(x = 'Condition',
y = 'Mean Activity Dark Cycles',
title = 'Unnormalized Activity') +
theme_bw()
zebrabox_data %>%
filter(datatype == 'QuantizationSum', !is.na(activity),
strain == control_group, light == 'dark') %>%
summarize(median(activity)) %>%
deframe() -> median_activity
zebrabox_data %>%
filter(datatype == 'QuantizationSum', !is.na(activity)) %>%
mutate(percent_change = ((activity - median_activity) / median_activity) * 100) %>%
group_by(treatment, light, well) %>%
summarize(mean_activity = mean(percent_change)) %>%
ungroup() %>%
filter(light == 'dark') %>%
ggplot(aes(x = treatment, y = mean_activity)) +
ggbeeswarm::geom_quasirandom() +
geom_boxplot(alpha = 0) +
labs(x = 'Condition',
y = 'Average Percent Change From Control Median\nin Dark Cycles',
title = 'Percent Change Activity') +
theme_bw()
zebrabox_data %>%
filter(datatype == 'QuantizationSum', !is.na(activity)) %>%
mutate(percent_change = ((activity - median_activity) / median_activity) * 100) %>%
group_by(treatment, light, well) %>%
summarize(mean_activity = mean(percent_change)) %>%
ungroup() %>%
filter(light == 'dark') %>%
rename(mean_percent_change = mean_activity) %>%
DT::datatable(extensions = 'Buttons',
options = list(dom = 'Blfrtip',
buttons = c('copy', 'csv', 'excel'),
lengthMenu = list(c(10, 25, 50, -1),
c(10, 25, 50, "All"))))
zebrabox_data %>%
filter(datatype == 'QuantizationSum', !is.na(activity)) %>%
mutate(period3 = ifelse(period2 %in% 1:2,
'acclimation', as.character(period2)),
period3 = factor(period3, levels = c('acclimation', '3', '4', '5',
'6', '7', '8', '9', '10'))) %>%
group_by(period3, treatment, light, well) %>%
summarize(mean_activity = mean(activity)) %>%
ungroup() %>%
ggplot(aes(x = period3, y = mean_activity, color = treatment)) +
annotate("rect", xmin = 1.5, xmax = 2.5, ymin = 0, ymax = Inf, fill = 'gray80') +
annotate("rect", xmin = 3.5, xmax = 4.5, ymin = 0, ymax = Inf, fill = 'gray80') +
annotate("rect", xmin = 5.5, xmax = 6.5, ymin = 0, ymax = Inf, fill = 'gray80') +
annotate("rect", xmin = 7.5, xmax = 8.5, ymin = 0, ymax = Inf, fill = 'gray80') +
geom_vline(xintercept = seq(1.5, 8.5, 1), linetype = 'dashed', color = 'gray60') +
geom_boxplot(alpha = 0) +
# ggbeeswarm::geom_quasirandom(position = position_dodge(width = .75)) +
geom_point(position = position_dodge(width = .75)) +
labs(x = 'Cycle',
y = 'Mean by Dark Cycle',
title = 'Unnormalized Activity') +
theme_bw() -> act_by_period
act_by_period
ggplotly(act_by_period)
zebrabox_data %>%
filter(datatype == 'QuantizationSum', !is.na(activity),
treatment == control_group) %>%
mutate(period3 = ifelse(period2 %in% 1:2,
'acclimation', as.character(period2)),
period3 = factor(period3, levels = c('acclimation', '3', '4', '5',
'6', '7', '8', '9', '10'))) %>%
group_by(period3) %>%
summarize(unique_name = median(activity)) %>%
ungroup() -> median_cycle_activity
zebrabox_data %>%
filter(datatype == 'QuantizationSum', !is.na(activity)) %>%
mutate(period3 = ifelse(period2 %in% 1:2,
'acclimation', as.character(period2)),
period3 = factor(period3, levels = c('acclimation', '3', '4', '5',
'6', '7', '8', '9', '10'))) %>%
left_join(median_cycle_activity, by = join_by(period3)) %>%
mutate(percent_change = ((activity - unique_name) / unique_name) * 100) %>%
group_by(period3, treatment, light, well) %>%
summarize(mean_activity = mean(percent_change)) %>%
ungroup() %>%
ggplot(aes(x = period3, y = mean_activity, color = treatment)) +
annotate("rect", xmin = 1.5, xmax = 2.5, ymin = -Inf, ymax = Inf, fill = 'gray80') +
annotate("rect", xmin = 3.5, xmax = 4.5, ymin = -Inf, ymax = Inf, fill = 'gray80') +
annotate("rect", xmin = 5.5, xmax = 6.5, ymin = -Inf, ymax = Inf, fill = 'gray80') +
annotate("rect", xmin = 7.5, xmax = 8.5, ymin = -Inf, ymax = Inf, fill = 'gray80') +
geom_vline(xintercept = seq(1.5, 8.5, 1), linetype = 'dashed', color = 'gray60') +
geom_hline(yintercept = 0, linetype = 'dashed', color = 'gray60') +
geom_boxplot(alpha = 0) +
geom_point(position = position_dodge(width = .75)) +
coord_cartesian(ylim = c(-200, 200)) +
labs(x = 'Cycle',
y = 'Percent Change From Control Median Activity',
title = 'Percent Change') +
theme_bw()
zebrabox_data %>%
filter(datatype == 'QuantizationSum', !is.na(activity)) %>%
mutate(period3 = ifelse(period2 %in% 1:2,
'acclimation', as.character(period2)),
period3 = factor(period3, levels = c('acclimation', '3', '4', '5',
'6', '7', '8', '9', '10'))) %>%
left_join(median_cycle_activity, by = join_by(period3)) %>%
mutate(percent_change = ((activity - unique_name) / unique_name) * 100) %>%
group_by(period3, treatment, light, well) %>%
summarize(mean_activity = mean(percent_change)) %>%
ungroup() %>%
rename(period = period3, percent_change = mean_activity) %>%
DT::datatable(extensions = 'Buttons',
options = list(dom = 'Blfrtip',
buttons = c('copy', 'csv', 'excel'),
lengthMenu = list(c(10, 25, 50, -1),
c(10, 25, 50, "All"))))
zebrabox_data %>%
filter(datatype == 'QuantizationSum', !is.na(activity)) %>%
mutate(period3 = ifelse(period2 %in% 1:2,
'acclimation', as.character(period2)),
period3 = factor(period3, levels = c('acclimation', '3', '4', '5',
'6', '7', '8', '9'))) %>%
group_by(period3, treatment, light, well) %>%
summarize(mean_activity = mean(activity)) %>%
ungroup() %>%
filter(light == 'dark') %>%
group_by(period3) %>%
nest() %>%
ungroup() %>%
mutate(test = map(data, ~ TukeyHSD(aov(mean_activity ~ treatment, data = .))),
class = map(test, ~ class(.))) %>%
unnest(c(class)) %>%
filter(class != 'try-error') %>%
mutate(test = map(test, ~ broom::tidy(.))) %>%
unnest(c(test)) %>%
select(period = period3,
groups_tested = contrast,
difference_means = estimate,
conf.low, conf.high, adj.p.value) %>%
mutate(difference_means = round(difference_means),
conf.low = round(conf.low),
conf.high = round(conf.high),
adj.p.value = round(adj.p.value, 4)) %>%
mutate(significant = ifelse(adj.p.value < 0.05, 'significant', 'not significant')) %>%
DT::datatable(extensions = 'Buttons',
options = list(dom = 'Blfrtip',
buttons = c('copy', 'csv', 'excel'),
lengthMenu = list(c(10, 25, 50, -1),
c(10, 25, 50, "All"))))
zebrabox_data %>%
filter(datatype == 'QuantizationSum', !is.na(activity)) %>%
group_by(treatment, light, period2, well) %>%
mutate(index = row_number()) %>%
nest() %>%
ungroup() %>%
mutate(period_peak = map(data, ~ as_tibble(pracma::findpeaks(.$activity, npeaks = 1)))) %>%
unnest(c(period_peak)) %>%
rename(peak_height = V1, peak_index = V2, curve_start_index = V3,
curve_end_index = V4) %>%
unnest(c(data)) %>%
mutate(max_peak = ifelse(index == peak_index, 'max', NA_character_)) -> zebrabox_data_w_peaks
# test with linear model
zebrabox_data_w_peaks %>%
filter(max_peak == 'max') %>%
distinct(treatment, period2, light, well, peak_height) %>%
filter(light == 'dark') %>%
group_by(treatment) %>%
nest() %>%
ungroup() %>%
mutate(test = map(data, ~ broom::tidy(lm(peak_height ~ period2, data = .)))) %>%
unnest(c(test)) %>%
filter(term != '(Intercept)') %>%
select(treatment, peak_height_change_over_time = estimate,
pvalue = p.value) %>%
mutate(peak_height_change_over_time = round(peak_height_change_over_time, 2),
pvalue = round(peak_height_change_over_time, 2)) %>%
DT::datatable(extensions = 'Buttons',
options = list(dom = 'Blfrtip',
buttons = c('copy', 'csv', 'excel'),
lengthMenu = list(c(10, 25, 50, -1),
c(10, 25, 50, "All"))),
caption = htmltools::tags$caption(style = 'caption-side: top; text-align: left; color:black; font-size:200% ;',
'Zebrabox'))
zebrabox_data_w_peaks %>%
filter(max_peak == 'max') %>%
distinct(treatment, period2, light, well, peak_height) %>%
filter(light == 'dark') %>%
ggplot(aes(x = period2, y = peak_height, color = treatment, group = well)) +
geom_point() +
geom_line() +
scale_x_continuous(breaks = c(3, 5, 7, 9)) +
labs(x = 'Condition',
y = 'Max Curve Height Dark Cycles',
title = 'Zebrabox') +
facet_wrap(~ treatment) +
theme_bw()
# zebrabox
zebrabox_data_w_peaks %>% #distinct(period) %>% mutate(period2 = str_remove(period, '^0+:')
group_by(treatment, well, light, period2) %>%
filter(index <= peak_index) %>% #filter(well == 'c1-004') %>% tail(1) %>% nest() %>% ungroup() %>% mutate(test = map(data, ~ .$activity)) %>% unnest(c(test))
nest() %>%
ungroup() %>%
mutate(len = map(data, ~nrow(.))) %>%
unnest(c(len)) %>%
mutate(period_start_slope = case_when(len == 1 ~ map(data, ~ .$activity), # maybe return 0 or NA?
len == 2 ~ map(data, ~ as.numeric(dist(select(.,
activity, time_min),
method = 'euclidean'))),
TRUE ~ map(data, ~ filter(broom::tidy(lm(activity ~ time_min, data = .)),
term == 'time_min')$estimate))) %>%
unnest(c(period_start_slope)) %>%
distinct(treatment, period2, light, well, period_start_slope) %>%
filter(light == 'dark') %>%
ggplot(aes(x = period2, y = period_start_slope, color = treatment, group = well)) +
geom_point() +
geom_line() +
scale_x_continuous(breaks = c(3, 5, 7, 9)) +
labs(x = 'Condition',
y = 'Starting Slope Dark Cycles',
title = 'Zebrabox') +
facet_wrap(~ treatment) +
theme_bw()
### just used a linear model
# zebrabox
zebrabox_data_w_peaks %>%
group_by(treatment, well, light, period2) %>%
filter(index >= peak_index) %>% #filter(well == 'c1-004') %>% tail(1) %>% nest() %>% ungroup() %>% mutate(test = map(data, ~ .$activity)) %>% unnest(c(test))
nest() %>%
ungroup() %>%
mutate(len = map(data, ~nrow(.))) %>%
unnest(c(len)) %>%
mutate(period_start_slope = case_when(len == 1 ~ map(data, ~ .$activity),
len == 2 ~ map(data, ~ as.numeric(dist(select(.,
activity, time_min),
method = 'euclidean'))),
TRUE ~ map(data, ~ filter(broom::tidy(lm(activity ~ time_min, data = .)),
term == 'time_min')$estimate))) %>%
unnest(c(period_start_slope)) %>%
distinct(treatment, period2, light, well, period_start_slope) %>%
filter(light == 'dark') %>%
ggplot(aes(x = period2, y = period_start_slope, color = treatment, group = well)) +
geom_point() +
geom_line() +
scale_x_continuous(breaks = c(3, 5, 7, 9)) +
labs(x = 'Condition',
y = 'Ending Slope Dark Cycles',
title = 'Zebrabox') +
facet_wrap(~ treatment) +
theme_bw()